#
# NOTE: Use the Makefiles to build this image correctly.
#

ARG BASE_IMG=<jupyter>
FROM $BASE_IMG

# Content below is based on the scripts/Dockerfiles here:
# https://github.com/HabanaAI/Setup_and_Install/blob/1.19.0/dockerfiles/base/Dockerfile.ubuntu22.04
# https://github.com/HabanaAI/Setup_and_Install/blob/1.19.0/dockerfiles/pytorch/Dockerfile.ubuntu

# args - gaudi version
ARG GAUDI_VERSION=1.19.2
ARG GAUDI_REVISION=32

# args - software versions
# see this support matrix for compatible versions:
# https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html
ARG AWS_EFA_VERSION=1.34.0
ARG HCCL_OFI_WRAPPER_VERSION=1.18.0
ARG LIBFABRIC_VERSION=1.22.0
ARG PYTORCH_VERSION=2.5.1

# Gaudi 1.19 does not currently support Python 3.11, so we downgrade to 3.10
# https://docs.habana.ai/en/latest/Support_Matrix/Support_Matrix.html
ARG PYTHON_VERSION=3.10.16
RUN sed -i "s/python ==.*/python ==${PYTHON_VERSION}/" ${CONDA_DIR}/conda-meta/pinned \
 && conda install -y -q \
    python==${PYTHON_VERSION} \
 && conda clean -a -f -y

USER root

# install - support libraries
RUN export DEBIAN_FRONTEND=noninteractive \
 && apt-get update -yq \
 && apt-get install -yq --no-install-recommends \
    apt-utils \
    bc \
    build-essential \
    graphviz \
    iproute2 \
    libcairo2-dev \
    libgl1 \
    libglib2.0-dev \
    libgnutls30 \
    libgoogle-glog0v5 \
    libgoogle-perftools-dev \
    libhdf5-dev \
    libjemalloc2 \
    libjpeg-dev \
    liblapack-dev \
    libmkl-dev \
    libnuma-dev \
    libopenblas-dev \
    libpcre2-dev \
    libpq-dev \
    libselinux1-dev \
    lsof \
    moreutils \
    numactl \
    protobuf-compiler \
 && apt-get clean \
 && rm -rf /var/lib/apt/lists/*

ENV LD_PRELOAD=/lib/x86_64-linux-gnu/libtcmalloc.so.4

# install - elastic fabric adapter
# NOTE: we use a temporary GNUPGHOME to avoid polluting the user's HOME with root-owned files
RUN export GNUPGHOME=$(mktemp -d) \
 && curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-$AWS_EFA_VERSION.tar.gz" -o /tmp/aws-efa-installer.tar.gz \
 && curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer-$AWS_EFA_VERSION.tar.gz.sig" -o /tmp/aws-efa-installer.tar.gz.sig \
 && curl -fsSL "https://efa-installer.amazonaws.com/aws-efa-installer.key" | gpg --import \
 && gpg --verify /tmp/aws-efa-installer.tar.gz.sig /tmp/aws-efa-installer.tar.gz \
 && tar xzf /tmp/aws-efa-installer.tar.gz -C /tmp \
 && cd /tmp/aws-efa-installer \
 && export DEBIAN_FRONTEND=noninteractive \
 && apt-get -yq update \
 && ./efa_installer.sh -y --skip-kmod --skip-limit-conf --no-verify \
 && rm -rf /etc/ld.so.conf.d/efa.conf /etc/profile.d/efa.sh \
 && apt-get clean \
 && rm -rf /var/lib/apt/lists/* \
 && rm -rf /tmp/aws-efa-installer.tar.gz /tmp/aws-efa-installer.tar.gz.sig /tmp/aws-efa-installer \
 && rm -rf "${GNUPGHOME}"

# config - fabric adapter and mpi variables
ENV MPI_ROOT=/opt/amazon/openmpi
ENV PATH=${MPI_ROOT}/bin:${PATH}
ENV MPICC=${MPI_ROOT}/bin/mpicc
ENV OPAL_PREFIX=${MPI_ROOT}
ENV RDMAV_FORK_SAFE=1
ENV FI_EFA_USE_DEVICE_RDMA=1
ENV LD_LIBRARY_PATH=${MPI_ROOT}/lib

# install - habana packages
RUN curl -fsSL "https://vault.habana.ai/artifactory/api/gpg/key/public" | gpg --dearmor -o /usr/share/keyrings/habana-artifactory.gpg \
 && chown root:root /usr/share/keyrings/habana-artifactory.gpg \
 && chmod 644 /usr/share/keyrings/habana-artifactory.gpg \
 && echo "deb [signed-by=/usr/share/keyrings/habana-artifactory.gpg] https://vault.habana.ai/artifactory/debian jammy main" | tee /etc/apt/sources.list.d/habana.list \
 && export DEBIAN_FRONTEND=noninteractive \
 && apt-get update -yq \
 && apt-get install -yq --no-install-recommends \
    habanalabs-firmware-tools="${GAUDI_VERSION}"-"${GAUDI_REVISION}" \
    habanalabs-graph="${GAUDI_VERSION}"-"${GAUDI_REVISION}" \
    habanalabs-rdma-core="${GAUDI_VERSION}"-"${GAUDI_REVISION}" \
    habanalabs-thunk="${GAUDI_VERSION}"-"${GAUDI_REVISION}" \
 && apt-get clean \
 && rm -rf /var/lib/apt/lists/*

# config - habana variables
ENV RDMA_CORE_ROOT=/opt/habanalabs/rdma-core/src
ENV RDMA_CORE_LIB=${RDMA_CORE_ROOT}/build/lib
ENV GC_KERNEL_PATH=/usr/lib/habanalabs/libtpc_kernels.so
ENV HABANA_LOGS=/var/log/habana_logs/
ENV HABANA_SCAL_BIN_PATH=/opt/habanalabs/engines_fw
ENV HABANA_PLUGINS_LIB_PATH=/opt/habanalabs/habana_plugins
ENV DATA_LOADER_AEON_LIB_PATH=/usr/lib/habanalabs/libaeon.so
ENV LD_LIBRARY_PATH=/usr/lib/habanalabs:${LD_LIBRARY_PATH}

# install - libfabric
ENV LIBFABRIC_ROOT="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}"
RUN curl -fsSL "https://github.com/ofiwg/libfabric/releases/download/v${LIBFABRIC_VERSION}/libfabric-${LIBFABRIC_VERSION}.tar.bz2" -o /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 \
 && tar xjf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 -C /tmp \
 && cd /tmp/libfabric-${LIBFABRIC_VERSION} \
 && ./configure --prefix=${LIBFABRIC_ROOT} --enable-psm3-verbs --enable-verbs=yes --with-synapseai=/usr \
 && make -j \
 && make install \
 && cd / \
 && rm -rf /tmp/libfabric-${LIBFABRIC_VERSION}.tar.bz2 /tmp/libfabric-${LIBFABRIC_VERSION}

# config - add libfabric to loadable libraries
ENV LD_LIBRARY_PATH="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}/lib:${LD_LIBRARY_PATH}"
ENV PATH="/opt/habanalabs/libfabric-${LIBFABRIC_VERSION}/bin:${PATH}"

# install - hccl wrapper for ofi
RUN curl -fsSL "https://github.com/HabanaAI/hccl_ofi_wrapper/archive/refs/tags/v${HCCL_OFI_WRAPPER_VERSION}.tar.gz" -o /tmp/hccl_ofi_wrapper.tar.gz \
 && tar xzf /tmp/hccl_ofi_wrapper.tar.gz -C /tmp \
 && cd /tmp/hccl_ofi_wrapper-${HCCL_OFI_WRAPPER_VERSION} \
 && make \
 && cp -f libhccl_ofi_wrapper.so /usr/lib/habanalabs/libhccl_ofi_wrapper.so \
 && cd / \
 && rm -rf /tmp/hccl_ofi_wrapper.tar.gz /tmp/hccl_ofi_wrapper-v${HCCL_OFI_WRAPPER_VERSION} \
 && /sbin/ldconfig

# config - add habana modules to PYTHONPATH
ENV PYTHONPATH=/usr/lib/habanalabs

USER $NB_UID

# install - habana pytorch media modules
RUN python3 -m pip install --quiet --no-cache \
    habana_media_loader=="${GAUDI_VERSION}.${GAUDI_REVISION}"

# install - habana pytorch modules
RUN curl -fsSL "https://vault.habana.ai/artifactory/gaudi-pt-modules/${GAUDI_VERSION}/${GAUDI_REVISION}/pytorch/ubuntu2204/pytorch_modules-v${PYTORCH_VERSION}_${GAUDI_VERSION}_${GAUDI_REVISION}.tgz" -o /tmp/gaudi-pt-modules.tgz \
 && mkdir -p /tmp/gaudi-pt-modules \
 && tar xzf /tmp/gaudi-pt-modules.tgz -C /tmp/gaudi-pt-modules \
 && cd /tmp/gaudi-pt-modules \
 && PIP_QUIET=1 PIP_NO_CACHE_DIR=1 PYTHON_VERSION=3 bash install.sh ${GAUDI_VERSION} ${GAUDI_REVISION} \
 && rm -rf /tmp/gaudi-pt-modules.tgz /tmp/gaudi-pt-modules

# install - requirements.txt
COPY --chown=${NB_USER}:users requirements.txt /tmp
RUN python3 -m pip install -r /tmp/requirements.txt --quiet --no-cache-dir \
 && rm -f /tmp/requirements.txt

# home - pre-populate home with files for this image
COPY --chown=${NB_USER}:${NB_GID} home/. ${HOME}/

# s6 - 01-copy-tmp-home
# NOTE: the contents of $HOME_TMP are copied to $HOME at runtime
#       this is a workaround because a PVC will be mounted at $HOME
#       and the contents of $HOME will be hidden
RUN cp -p -r -T "${HOME}" "${HOME_TMP}" \
    # give group same access as user (needed for OpenShift)
 && chmod -R g=u "${HOME_TMP}"